# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import requests
import csv

url = "http://bj.58.com/pinpaigongyu/pn/{page}/?minprice=2000_4000"

# 已完成的页数序号
page = 0

with open('rent.csv', 'wt', encoding="utf-8") as f:
    csv_writer = csv.writer(f, delimiter=',')
    while True:
        page += 1
        print('fetch:', url.format(page=page))
        response = requests.get(url.format(page=page))
        html = BeautifulSoup(response.text, 'html.parser')
        house_list = html.select('.list > li')

        # 读不到新房源食结束
        if not house_list:
            break

        for house in house_list:
            house_title = house.select('h2')[0].string.encode('utf8').decode('utf-8')
            house_url = urljoin(url, house.select('a')[0]['href'])
            house_info_list = house_title.split()
            # print(type(house_title))

            # 如果第二列是公寓名则取第一列作为地址
            if '公寓' in house_info_list[1] or '青年社区' in house_info_list[1]:
                house_location = house_info_list[0]
            else:
                house_location = house_info_list[1]
            house_money = house.select('.money')[0].select('b')[0].string.encode('utf8').decode('utf-8')
            csv_writer.writerow([house_title, house_location, house_money, house_url])
